/*******************************************************************************

  Paying Outsourced Labor: Direct Evidence from Linked Temp Agency-Worker-Client Data

  By Andres Drenik, Simon Jäger, Pascuel Plotkin and Benjamin Schoefer
  January 7th, 2021

	DESCRIPTION: Creates Appendix Table A.2

*******************************************************************************/




/********************************************************************************
***** Preliminaries
********************************************************************************/
set more off
cap log close
local curr_date = c(current_date)
log using "${logs}/TabA2`curr_date'", replace


/****************************************************************************************
* Looping over the 2010 - 2017 filtered datasets, to generate one unique dataset
****************************************************************************************/

forvalues y = 2010/2017{

	use "${Data_with_filter}/Argentina_Clean_`y'.dta", clear

/****************************************************************************************
* Drop useless observations for our analysis
	* Drop duplicated observations for temp workers (modalidad = 102)
	* Only keep private sector workers
	* Only keep workers that have 1 user firm per month
****************************************************************************************/

	drop if modalidad == 102
	drop if public_worker == 1
	drop if multiple_user_firm == 1
	drop if base_registro_match == 2

/****************************************************************************************
* Drop useless variables
****************************************************************************************/

	capture drop base_registro_match
	capture drop multiple_user_firm
	capture drop cuit_user_firm2
	capture drop cuit_user_firm3
	capture drop cuit_user_firm4
	capture drop cuit_user_firm5

*************

	* Rename variable that flags the user firm
	rename cuit_user_firm1 cuit_user_firm

	* Generate 2 digit industry code
	gen industry_code_2digit=int(ciiu_4/100)

	*Drop observations that don't register a wage
	drop if remuner_total == .

    *Flag Temp Worker per date
	bys cuil_trab date temp_worker (remuner_total) : gen worker_user_firm = (_n == _N & temp_worker == 1)
	gen total_obs = 1

	bys cuil_trab date (remuner_total): gen worker = (_n == _N)

    *Flag Temp workers in Part time Jobs
	gen part_time = (modalidad == 1 | modalidad == 21 | modalidad == 304 | modalidad == 305 | modalidad == 306)
	bys cuil_trab date : gegen worker_part_time = sum(part_time)
    gen temp_worker_part_time = (worker_part_time > 0 & worker_user_firm == 1)

    *Flag temp workers that are multiple job holders
	duplicates tag cuil_trab date, gen(duplicate)
	gen temp_worker_multiple_job = (duplicate > 0 & worker_user_firm == 1)

	*Tag workers in a certain period in which they are also flagged as temp workers
	gegen temp_work_period = sum(temp_worker), by(cuil_trab date)
	replace temp_work_period = 1 if temp_work_period>=1
	replace temp_work_period = 0 if temp_work_period!=1

	*tag workers with reg jobs and temp jobs in the same period
	gen multiple_job_aux = (temp_work_period == 1 & temp_worker == 0)
	drop temp_work_period

    *Multiple job workers who's main wage comes from temp work
	gegen multiple_job_reg_temp_obs = sum(multiple_job_aux), by(cuil_trab date)
	replace multiple_job_reg_temp_obs = 1 if multiple_job_reg_temp_obs >= 1
	replace multiple_job_reg_temp_obs = 0 if multiple_job_reg_temp_obs != 1
	drop multiple_job_aux
	gen multiple_job_reg_temp = (multiple_job_reg_temp_obs == 1 & worker_user_firm == 1)
	bys cuil_trab date (remuner_total) : gen multiple_main_wage_temp = (multiple_job_reg_temp_obs == 1 & _n == _N & temp_worker == 1)
	drop multiple_job_reg_temp_obs

	*Gender
	gen female = (gender == 2 & worker_user_firm == 1)
	gen male = (gender == 3 & worker_user_firm == 1)

	*Preparation of variables for the gcollapse
	foreach i of var worker_user_firm {

	    *Median and Mean Age
		qui sum age if `i' ==1, detail
		local mean_age_`i' = r(mean)
		local median_age_`i' = r(p50)
		gen mean_age_`i' = `mean_age_`i''
		gen median_age_`i' =`median_age_`i''

	    *Gender shares
		sum gender if gender == 2 & `i' == 1
		local female_`i' = r(sum_w)
		qui sum `i' if `i' == 1
		local total_`i' = r(sum_w)
		local perc_female_`i' = `female_`i''/`total_`i''
		local perc_male_`i' = 1 - `perc_female_`i''
		gen perc_female_`i' = `perc_female_`i''
		gen perc_male_`i' = `perc_male_`i''

	    *Mean and Median Wages in Pesos
		qui sum remuner_total if `i' ==1, detail
		local mean_wage_`i' = r(mean)
		local median_wage_`i' = r(p50)
		gen mean_wage_`i' = `mean_wage_`i''
		gen median_wage_`i' =`median_wage_`i''

	}

	gcollapse (mean) year perc_* mean_* median_* (sum) worker worker_user_firm total_obs temp_worker temp_worker_multiple_job multiple_job_reg_temp temp_worker_part_time multiple_main_wage_temp male female, by(date UF_industry_code_2digit) fast
	save "${intermediate_data_emp}/kk_table3_`y'.dta", replace

}

*************************************
*Append and variables by industry letter
*************************************

	use "${intermediate_data_emp}/kk_table3_2010.dta", clear
	forvalues y = 2011/2017{
		append using "${intermediate_data_emp}/kk_table3_`y'.dta"
	}

	save "${intermediate_data_emp}/kk_table3.dta", replace

	*labels
	gen industry_code_letter = 1 if inrange(UF_industry_code_2digit, 1, 5)
	replace industry_code_letter = 2 if inrange(UF_industry_code_2digit, 10, 14)
	replace industry_code_letter = 3 if inrange(UF_industry_code_2digit, 40, 41)
	replace industry_code_letter = 4 if UF_industry_code_2digit == 45
	replace industry_code_letter = 5 if inrange(UF_industry_code_2digit, 15, 37)
	replace industry_code_letter = 6 if UF_industry_code_2digit == 51
	replace industry_code_letter = 7 if UF_industry_code_2digit == 50 | UF_industry_code_2digit == 52
	replace industry_code_letter = 8 if inrange(UF_industry_code_2digit, 60, 64)

	replace industry_code_letter = 10 if inrange(UF_industry_code_2digit, 65, 67)
	replace industry_code_letter = 11 if inrange(UF_industry_code_2digit, 70, 74)
	replace industry_code_letter = 12 if UF_industry_code_2digit == 80 | UF_industry_code_2digit == 85
	replace industry_code_letter = 13 if UF_industry_code_2digit == 55
	replace industry_code_letter = 14 if inrange(UF_industry_code_2digit, 90, 93) | UF_industry_code_2digit==99

	replace industry_code_letter = 16 if UF_industry_code_2digit == 75
	replace industry_code_letter = 17 if inrange(UF_industry_code_2digit, 95, 97)
	replace industry_code_letter = 18 if UF_industry_code_2digit == 0 | UF_industry_code_2digit == .

	*RETAIL TRADE INCLUDES CAR TRADES
	label define industry 1 "Agriculture, Foresty, Fishing and Hunting" 2 "Mining" 3 "Utilities" 4 "Construction" 5 "Manufacturing" 6 "Wholsale Trade" 7 "Retail Trade" 8 "Transportation Warehousing and comunication" 9 "Information" 10 "Financial activities" 11 "Professional and Business Services" 12 "Education and Health Services" 13 "Leisure and Hospitality" 14 "Other Services (Excluding Public Administration)" 15 "Public Administration" 16 "Temporary work agents" 17 "Activities of private households as employers and undifferentiated production activities of private households" 18 "Other (public administration?)", modify
	label values industry_code_letter industry

	egen workers_industry_letter_UF = sum(worker_user_firm), by(industry_code_letter)

	*workers by industry
	egen workers_industry_UF_year = sum(worker_user_firm), by(industry_code_letter year)
	egen total_workers_industry_UF_year = sum(worker_user_firm), by(year)
  	egen avg_worker_year_UF = total_workers_industry_UF_year/12
	gen perc_workers_industry_UF_year = workers_industry_UF_year / total_workers_industry_UF_year

	*Merge with inflation dataset
	merge m:1 year using "${input}/ArgentinaPriceLevel.dta"
	drop if _merge==2
	drop _merge

	sum index if year == 2017
	local index_2017 = r(mean)

	*Inflate wages to 2017
	gen coefficient_for_real_wage = (`index_2017'/index)
	gen mean_real_wage_2017 = coefficient_for_real_wage * mean_remuner_total
	gen median_real_wage_2017 = coefficient_for_real_wage * median_remuner_total

	*Average exchange rate for the last two years in Argentina (2017-2016)
	local average_exchange_rate = 21.39

	*Wages in dollars
	gen median_wage_usd_user_firm = median_real_wage_2017_user_firm/`average_exchange_rate'
	gen mean_wage_usd_user_firm = mean_real_wage_2017_user_firm/`average_exchange_rate'

	egen multiple_job_year = sum(temp_worker_multiple_job), by(year)
	egen part_time_year = sum(temp_worker_part_time), by(year)
	egen total_tempworker_year = sum(worker_user_firm), by(year)
	gen multiple_job_percent = multiple_job_year / total_tempworker_year
	gen part_time_percent = part_time_year / total_tempworker_year

	egen total_obs_year = sum(total_obs), by(year)

	save "${intermediate_data_emp}/kk_table3.dta", replace

	*********************
	******* TABLE *******
	*********************

	file open table3_kk using "${Results}\table3_kk.tex", write replace
	file write table3_kk "\begin{table}[]" _n
  	file write table3_kk "\begin{tabular}{lcccccc}" _n "\hline" _n
  	file write table3_kk "& \multicolumn{3}{c}{SIPA Dataset} & \multicolumn{3}{c}{US Survey (Katz \& Krueger)} \\ \hline" _n
  	file write table3_kk "& \multicolumn{1}{l}{} & \multicolumn{1}{l}{} & \multicolumn{1}{l}{} & CPS & Weighted & Alt. Weight \\ \cline{2-7}" _n
	file write table3_kk "(Average for workers in user firms during each year) & 2011 & 2015 & 2017 & 2005 & \multicolumn{2}{c}{2015} \\ \cline{2-7} " _n

	rename mean_wage_worker_user_firm mean_wage_worker_UF
	rename median_wage_worker_user_firm median_wage_worker_UF
	rename median_age_worker_user_firm median_age_worker_UF
	rename mean_age_worker_user_firm mean_age_worker_UF
	rename median_wage_usd_user_firm median_wage_usd_UF
	rename mean_wage_usd_user_firm mean_wage_usd_UF

	local varlist mean_wage_worker_UF median_age_worker_UF mean_age_worker_UF median_wage_usd_UF mean_wage_usd_UF
	foreach x of local varlist {
		foreach i of numlist 2011 2015 2017{
				qui sum `x' if year == `i'
				local `x'_`i' = round(r(mean), .1)
				local `x'_`i' = (substr(string(``x'_`i'',"%6.0f"),1,6))
		}
	}

	foreach i of numlist 2011 2015 2017 {
		qui sum perc_female_worker_user_firm if year == `i'
		local perc_female_`i' = round(r(mean)*100, .1)
		local perc_female_`i' = (substr(string(`perc_female_`i'',"%6.1f"),1,5))
	}

	foreach i of numlist 2011 2014 2017 {
		qui sum part_time_percent if year == `i'
		local part_time_percent_`i' = round(r(mean)*100, .1)
		local part_time_percent_`i' = (substr(string(`part_time_percent_`i'',"%6.1f"),1,5))
	}

	foreach i of numlist 2011 2014 2017 {
		qui sum multiple_job_percent if year == `i'
		local multiple_job_percent_`i' = round(r(mean)*100, .1)
		local multiple_job_percent_`i' = (substr(string(`multiple_job_percent_`i'',"%6.1f"),1,5))
	}

	foreach i of numlist 2011 2014 2017 {
		qui sum avg_worker_year_UF if year == `i'
		local avg_worker_year_UF_`i' = r(mean)
		local avg_worker_year_UF_`i' = (substr(string(`avg_worker_year_UF_`i'',"%15.0f"),1,15))
		dis `avg_worker_year_UF_`i''
	}

	file write table3_kk "Median Age (years) & `median_age_worker_UF_2011' & `median_age_worker_UF_2015' & \multicolumn{1}{r|}{`median_age_worker_UF_2017'} & 44 & 47 & 45 \\" _n
	file write table3_kk "Mean Age (years) & `mean_age_worker_UF_2011' & `mean_age_worker_UF_2015' & \multicolumn{1}{r|}{`mean_age_worker_UF_2017'} & 44.0 & 46.8 & 46.5 \\" _n
	file write table3_kk "Median Wage (dollars) & `median_wage_usd_UF_2011' & `median_wage_usd_UF_2015' & \multicolumn{1}{r|}{`median_wage_usd_UF_2017'} &  &  &  \\" _n
  	file write table3_kk "Mean Wage (dollars) & `mean_wage_usd_UF_2011' & `mean_wage_usd_UF_2015' & \multicolumn{1}{r|}{`mean_wage_usd_UF_2017'} &  &  &  \\" _n
  	file write table3_kk "Female (percent) & `perc_female_2011' & `perc_female_2015' & \multicolumn{1}{r|}{`perc_female_2017'} & 38.6 & 50.4 & 50.8 \\" _n
	file write table3_kk "Multiple Jobholder & `multiple_job_percent_2011' & `multiple_job_percent_2014' & \multicolumn{1}{c|}{`multiple_job_percent_2017'} & 7.4 & 32.0 & 33.0 \\" _n
	file write table3_kk "Part-Time Employment & `part_time_percent_2011' & `part_time_percent_2014' & \multicolumn{1}{c|}{`part_time_percent_2017'} & 35.2 & 47.7 & 46.2 \\" _n
	file write table3_kk "\textbf{Industry (percent):} &  &  & \multicolumn{1}{c|}{} &  &  &  \\" _n

	forvalues x = 1/18 {
		foreach i of numlist 2011 2015 2017 {
			qui sum perc_workers_industry_UF_year if industry_code_letter == `x' & year == `i'
			local `x'_`i' = round(r(mean)*100, .1)
			local `x'_`i' = (substr(string(``x'_`i'',"%6.1f"),1,5))
		}
	}

	file write table3_kk "Agriculture, Forestry, Fishing and Hunting & `1_2008' & `1_2011' & `1_2015' & \multicolumn{1}{r|}{`1_2017'} & 1.5 & 4.4 & 4.1 \\" _n
	file write table3_kk "Mining & `2_2011' & `2_2015' & \multicolumn{1}{r|}{`2_2017'} & 0.3 & 0.5 & 0.5 \\" _n
	file write table3_kk "Utilities & `3_2011' & `3_2015' & \multicolumn{1}{r|}{`3_2017'} & 0.5 & 0.3 & 0.4 \\" _n
	file write table3_kk "Construction & `4_2011' & `4_2015' & \multicolumn{1}{r|}{`4_2017'} & 18.0 & 7.0 & 6.7 \\" _n
	file write table3_kk "Manufacturing & `5_2011' & `5_2015' & \multicolumn{1}{r|}{`5_2017'} & 4.7 & 5.9 & 6.2 \\" _n
	file write table3_kk "Wholsale Trade & `6_2011' & `6_2015' & \multicolumn{1}{r|}{`6_2017'} & 2.3 & 0.6 & 0.7 \\" _n
	file write table3_kk "Retail Trade & `7_2011' & `7_2015' & \multicolumn{1}{r|}{`7_2017'} & 7.1 & 6.3 & 6.4 \\" _n
	file write table3_kk "Transportation Warehousing and communication & `8_2011' & `8_2015' & \multicolumn{1}{r|}{`8_2017'} & 6.4 & 9.0 & 9.2 \\" _n
	file write table3_kk "Financial activities & `10_2011' & `10_2015' & \multicolumn{1}{r|}{`10_2017'} & 7.8 & 6.4 & 6.2 \\" _n
	file write table3_kk "Professional and Business Services & `11_2011' & `11_2015' & \multicolumn{1}{r|}{`11_2017'} & 23.4 & 20.7 & 20.6 \\" _n
	file write table3_kk "Education and Health Services & `12_2011' & `12_2015' & \multicolumn{1}{r|}{`12_2017'} & 13.9 & 21.9 & 22.3 \\" _n
	file write table3_kk "Leisure and Hospitality & `13_2011' & `13_2015' & \multicolumn{1}{r|}{`13_2017'} & 5.1 & 4.7 & 4.6 \\" _n
	file write table3_kk "Other Services (Excluding Public Administration) & `14_2011' & `14_2015' & \multicolumn{1}{r|}{`14_2017'} & 7.7 & 7.4 & 7.1 \\" _n
	file write table3_kk " &  &  & \multicolumn{1}{c|}{} &  &  &  \\" _n
	file write table3_kk "\textbf{Avg. Workers} & 40,227 & 20,981 & \multicolumn{1}{c|}{21,227} &  &  & \\ \hline" _n
	file write table3_kk "\end{tabular}" _n
	file write table3_kk "\end{table}"_n

	file close table3_kk

log close
